package com.address.hive;



import org.address.AddressTool;
import org.address.DataTable;
import org.address.entity.StandardAddress;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;

// 版本6发现有些小区跨多个门牌号，一个子aoi或者某个楼栋就是一个门牌号，版本7改进
public class AddressLink extends GenericUDF {
    private PrimitiveObjectInspector addressIO;
    private static AddressTool addressTool;



    private String bld(String building){
        if(building!=null&&!building.isEmpty() ){
            if(building.endsWith("栋")||building.endsWith("幢")){
                return building.substring(0,building.length()-1);
            }else if(building.endsWith("号楼")){
                return building.substring(0,building.length()-2);
            }
        }

        return building;
    }

    private String unit(String unit){
        if(unit!=null&&!unit.isEmpty() ){
            if(unit.endsWith("单元")){
                return unit.substring(0,unit.length()-2);
            }
        }

        return unit;
    }

    private String room(String room){
        if(room!=null&&!room.isEmpty() ){
            if(room.endsWith("室")||room.endsWith("户")){
                return room.substring(0,room.length()-1);
            }
        }

        return room;
    }


    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        if (arguments[0] instanceof ObjectInspector) {
            addressIO = (PrimitiveObjectInspector) arguments[0];
        }else{
            throw new UDFArgumentLengthException("The function GetMapValue accepts  1 argument. simple: GetSqName(sq_name)");
        }
        addressTool = new AddressTool();
        DataTable data = new DataTable();
        try{
            // 数据源无限制，可以使用任何数据库作为标准地址存储工具，本项目以postgres为例
            //注册Driver
            String driver = "org.postgresql.Driver";//prop.getProperty("driver");
            String url = "jdbc:postgresql://*****:5432/postgres";//prop.getProperty("url");
            String username = "******";//prop.getProperty("user");
            String password = "******";//prop.getProperty("password");
            Class.forName(driver);
            Connection connection = DriverManager.getConnection(url, username, password);
            Statement statement = connection.createStatement();

            // 数据初始化
            ResultSet res = statement.executeQuery("select id,province,city,county,town,community,road,road_no,aoi,sub_aoi,building,unit,room,address from st_address order by aoi,road,road_no");
            int cnt = 0;
            while (res.next()) {
                HashMap<String,String> mp = new HashMap<>();
                if(res.getString("id")!=null&& !Objects.equals(res.getString("id"), "")){mp.put("id",res.getString("id"));}
                if(res.getString("province")!=null&& !Objects.equals(res.getString("province"), "")){mp.put("province",res.getString("province"));}
                if(res.getString("city")!=null&& !Objects.equals(res.getString("city"), "")){mp.put("city",res.getString("city"));}
                if(res.getString("county")!=null&& !Objects.equals(res.getString("county"), "")){mp.put("county",res.getString("county"));}
                if(res.getString("town")!=null&& !Objects.equals(res.getString("town"), "")){mp.put("town",res.getString("town"));}
                if(res.getString("community")!=null&& !Objects.equals(res.getString("community"), "")){mp.put("community",res.getString("community"));}
                if(res.getString("road")!=null&& !Objects.equals(res.getString("road"), "")){mp.put("road",res.getString("road"));}
                if(res.getString("road_no")!=null&& !Objects.equals(res.getString("road_no"), "")){mp.put("road_no",res.getString("road_no"));}
                if(res.getString("aoi")!=null&& !Objects.equals(res.getString("aoi"), "")){mp.put("aoi",res.getString("aoi"));}
                if(res.getString("sub_aoi")!=null&& !Objects.equals(res.getString("sub_aoi"), "")){mp.put("sub_aoi",res.getString("sub_aoi"));}
                if(res.getString("building")!=null&& !Objects.equals(res.getString("building"), "")){mp.put("building",bld(res.getString("building")));}
                if(res.getString("unit")!=null&& !Objects.equals(res.getString("unit"), "")){mp.put("unit",unit(res.getString("unit")));}
                if(res.getString("room")!=null&& !Objects.equals(res.getString("room"), "")){mp.put("room",room(res.getString("room")));}
                if(res.getString("com/address")!=null&& !Objects.equals(res.getString("com/address"), "")){mp.put("com/address",res.getString("com/address"));}
                data.addAddressDic(mp);
                cnt = cnt + 1;
            }

            //标准数据地址数据加载到addresstool
            data.initData(addressTool);
            data = null;
            statement.close();
            connection.close();

        } catch (Exception throwables) {
            throwables.printStackTrace();
        }

        return ObjectInspectorFactory.getStandardMapObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    @Override
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
        if(arguments[0].get()==null){
            return null;
        }


        String address =  PrimitiveObjectInspectorUtils.getString(arguments[0].get(), this.addressIO);
        // 中文地址中的异常字符预处理
        while(address.contains(" ")){address = address.replace(" ","");}
        while(address.contains("--")){address = address.replace("--","-");}
        while(address.contains("——")){address = address.replace("——","-");}
        while(address.contains("- ")){address = address.replace("- ","-");}
        while(address.contains(" -")){address = address.replace(" -","-");}
        while(address.contains("— ")){address = address.replace("— ","-");}
        while(address.contains(" —")){address = address.replace(" —","-");}


        // 地址关联
        StandardAddress stdAddress = addressTool.getStdAddress(address);
        Map<String,String> result = stdAddress.getStdAddress();
        // 地址级别判断
        if(stdAddress.addressLevel!=null&& !stdAddress.addressLevel.equals("")){
            result.put("addressLevel",stdAddress.addressLevel);
        }else{
            result.put("addressLevel","未知");
        }

        // 地址关联级别判断
        if(stdAddress.linkLevel!=null&& !stdAddress.linkLevel.equals("")){
            result.put("linkLevel",stdAddress.linkLevel);
        }else{
            result.put("linkLevel","未关联");
        }


        return result;
    }


    @Override
    public String getDisplayString(String[] children) {
        return "Address(" + children[0] + ")";
    }

}

